//Set up KLIPS data set - merge individual level and household level data into one file

//NOTE: Must download KLIPS raw individual level dataset ("klips_ind.dta") and KLIPS raw household level dataset ("eklips`wave'h.dta") where `wave' refers to the survey wave.
//This code then cleans and sets up the dataset for later analysis.

//Load raw individual level dataset
use Data\Raw\klips_ind.dta, clear

///Drop variables we don't need and rename weight variables to make reshaping data easier
drop sample98-hwaveent
drop jobclass-jobtype
drop version
drop hmem22 hmem21 hmem20 hmem19 hmem18 hmem17 hmem16 hmem15 hmem14 hmem13 hmem12 hmem11 hmem10 hmem09 hmem08 hmem07 hmem06 hmem05 hmem04 hmem03 hmem02 hmem01
foreach i in 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 {
	rename w`i'p_c _weight_p`i'
	rename w`i'p_l _lweight_p`i'
	capture confirm variable sw`i'p_c
	if !_rc {
	rename sw`i'p_c _sweight_p`i'
	rename sw`i'p_l _lsweight_p`i'
		}
	capture confirm variable nw`i'p_c
	if !_rc {
	rename nw`i'p_c _nweight_p`i'
	rename nw`i'p_l _lnweight_p`i'
		}
}
	//Generate longitudinal and cross-sectional weight vars for first years in sample
	gen _weight_p01 = w01p
	gen _lweight_p01 = w01p
	gen _sweight_p12 = sw12p
	gen _lsweight_p12 = sw12p
	gen _nweight_p21 = nw21p
	gen _lnweight_p21 = nw21p

ds, has(type string)
drop `r(varlist)'
foreach i in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 {
	rename p`i'* _*_p`i'
}
foreach i in 11 12 13 14 15 16 17 18 19 20 21 22 {
	rename pa`i'* _*_pa`i'
}

///Choose which variables to keep and use
local keeplist = ""
local keeplist "`keeplist' pid"
local keeplist "`keeplist' hhid*"
local keeplist "`keeplist' orghid98"
local keeplist "`keeplist' orghid09"
local keeplist "`keeplist' orghid18"
	
	
	//Variables
	foreach n in 0101 0107 0110 0121 0202 0314 0315 0317 0340 0350 0401 0402 0403 0405 1004 1006 1003 1012 1011 1642 5204 9001 2501 6301 6302   5803 5501 9071 9074 9075 6504 6505 6508 6611 6612 4201 4301 4302 4303 4304 4305 4311 4312 4313 4314 4315 4316 4317 4318 4319 4321 4322 4401 4402 9072 0501 0601 0605 0602 0611 0612 0613 0508   {
foreach i in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 {
		local varlist "`varlist'" "_`n'_" " "
		capture confirm variable _`n'_p`i'
		if !_rc {
			local keeplist "`keeplist' _`n'_p`i'"
			}
	}
local keeplist2 "`keeplist2' _`n'_"
}

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 {
	capture confirm variable _weight_p`i'
	if !_rc {
		local keeplist "`keeplist' _weight_p`i' _lweight_p`i'"
		}
	capture confirm variable _sweight_p`i'
	if !_rc {
		local keeplist "`keeplist' _sweight_p`i' _lsweight_p`i'"
		}
	capture confirm variable _nweight_p`i'
	if !_rc {
		local keeplist "`keeplist' _nweight_p`i' _lnweight_p`i'"
		}
}

keep `keeplist'

///Reshape data to get year variable and merge all variables which are currently separated by year
reshape long "`keeplist2'" _weight_ _lweight_ _sweight_ _lsweight_ _nweight_ _lnweight_, i(pid) j(year, string)

///Year variable
gen years = 2019 if year == "p22"
replace years = 2018 if year == "p21"
replace years = 2017 if year == "p20"
replace years = 2016 if year == "p19"
replace years = 2015 if year == "p18"
replace years = 2014 if year == "p17"
replace years = 2013 if year == "p16"
replace years = 2012 if year == "p15"
replace years = 2011 if year == "p14"
replace years = 2010 if year == "p13"
replace years = 2009 if year == "p12"
replace years = 2008 if year == "p11"
replace years = 2007 if year == "p10"
replace years = 2006 if year == "p09"
replace years = 2005 if year == "p08"
replace years = 2004 if year == "p07"
replace years = 2003 if year == "p06"
replace years = 2002 if year == "p05"
replace years = 2001 if year == "p04"
replace years = 2000 if year == "p03"
replace years = 1999 if year == "p02"
replace years = 1998 if year == "p01"
drop year
save  Data\Raw\klips_indhid_clean.dta, replace

clear

//Merge with household level data

///Add year variable and merge all waves of the household survey together
local waves 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22

foreach wave of local waves{
use  Data\Raw\eklips`wave'h.dta, clear
sort hhid`wave'
gen years = `wave'+1997

local keepvars

local masterlist h`wave'0361 h`wave'0362 h`wave'0363 h`wave'0364 h`wave'0365 h`wave'0366 h`wave'0367 h`wave'0368 h`wave'0369 h`wave'0370 h`wave'0371 h`wave'0372 h`wave'0373 h`wave'0374 h`wave'0375 h`wave'0221 h`wave'0222 h`wave'0223 h`wave'0224 h`wave'0225 h`wave'0226 h`wave'0227 h`wave'0228 h`wave'0229 h`wave'0230 h`wave'0231 h`wave'0232 h`wave'0233 h`wave'0234 h`wave'0235 h`wave'1101 h`wave'1201 h`wave'1501 h`wave'1502
local keeplist years hhid`wave' orghid* 

foreach i of local masterlist  {
    capture confirm variable `i'
        if !_rc {
            local keeplist `keeplist' `i'
        }
}
 keep `keeplist'
rename h`wave'* h*
drop if missing(hhid`wave')
duplicates drop 
save Data\Raw\eklips`wave'h_use3.dta, replace
}

use  Data\Raw\klips_indhid_clean, clear
foreach wave of local waves{
	merge m:1 hhid`wave' orghid98 years using Data\Raw\eklips`wave'h_use3, gen(_m`wave') update
}
drop _m01-_m22

local wave 
rename (h`wave'0361 h`wave'0362 h`wave'0363 h`wave'0364 h`wave'0365 h`wave'0366 h`wave'0367 h`wave'0368 h`wave'0369 h`wave'0370 h`wave'0371 h`wave'0372 h`wave'0373 h`wave'0374 h`wave'0375 h`wave'0221 h`wave'0222 h`wave'0223 h`wave'0224 h`wave'0225 h`wave'0226 h`wave'0227 h`wave'0228 h`wave'0229 h`wave'0230 h`wave'0231 h`wave'0232 h`wave'0233 h`wave'0234 h`wave'0235) (age1 age2 age3 age4 age5 age6 age7 age8 age9 age10 age11 age12 age13 age14 age15 pid1 pid2 pid3 pid4 pid5 pid6 pid7 pid8 pid9 pid10 pid11 pid12 pid13 pid14 pid15)

//Save merged dataset 
save Data\klips_hh_merged, replace

//Clean merged household-individual level data
use Data\klips_hh_merged, clear

//Drop if missing personal identifier
drop if missing(pid)

///RECODE and CLEAN
///Create work hours variable based on KLIPS formula
recode _1004_ _1006_ _1012_(-1=.)
egen worktime=rowtotal(_1006_ _1012_) if _1003_==1 & _1011_==2
replace worktime=_1004_ if _1003_==2
replace worktime=_1006_ if _1003_==1 & _1011_==1
///drop impossible hours
replace worktime = . if worktime > 168

///Wages
replace _1642_ = .  if _1642_ == -1
replace _1642_ = _1642_ / 4
replace _1642_ = _1642_ / worktime
rename _1642_ wage
gen logwage = log(wage)

///Gender
gen female = 1 if _0101_==2
replace female = 0 if _0101_==1
gen gender = "women" if female==1
replace gender = "men" if female==0

///Age
rename _0107_ age
gen agesq = age ^2

// 5-year age groups
gen agegroup = "20-24" if age>=20 & age<=24
replace agegroup = "25-29" if age>=25 & age<=29
replace agegroup = "30-34" if age>=30 & age<=34
replace agegroup = "35-39" if age>=35 & age<=39
replace agegroup = "40-44" if age>=40 & age<=44
replace agegroup = "45-49" if age>=45 & age<=49
replace agegroup = "50-54" if age>=50 & age<=54
replace agegroup = "55-59" if age>=55 & age<=59
replace agegroup = "60-64" if age>=60 & age<=64
 
///Education
replace _0110_ = . if _0110_ == -1
rename _0110_ educ
//////Create years of education
gen edu = 0 if educ == 2
replace edu = 6 if educ == 3
replace edu = 9 if educ == 4
replace edu = 12 if educ == 5
replace edu = 14 if educ == 6
replace edu = 16 if educ == 7
replace edu = 18 if educ == 8
replace edu = 19 if educ == 9

//College tag (binary)
gen college = (educ>=7)
replace college = . if missing(educ)

///Industry
replace _0340_ = .  if _0340_== -1
rename _0340_ ind

///Occupation
replace _0350_ = . if _0350_==-1
rename _0350_ occ

///Residence
rename _0121_ residence

///Main Activity
rename _0202_ main

///Workplace Type
replace _0401_ = .   if _0401_ == -1
rename _0401_ workplace

///Work Status
replace _0314_ = .   if _0314_ == -1
rename _0314_ status

///Union Status
replace _2501_ = .   if _2501_ == -1
rename _2501_ union

///Number of people at company
gen employees = _0403_ if _0403_ !=.
gen temp = _0402_
replace employees = 1 if temp >= 1 & temp < 5
replace employees = 2 if temp >= 5 & temp < 10
replace employees = 3 if temp >= 10 & temp < 30
replace employees = 4 if temp >= 30 & temp < 50
replace employees = 5 if temp >= 50 & temp < 70
replace employees = 6 if temp >= 70 & temp < 100
replace employees = 7 if temp >= 100 & temp < 300
replace employees = 8 if temp >= 300 & temp < 500
replace employees = 9 if temp >= 500 & temp < 1000
replace employees =10 if temp >=1000 & temp <100000
drop temp
replace employees = . if employees == 11

///Industry Classifications
gen ind_class = "Agriculture & mining" if ind == 11 | ind == 12 | ind == 13 | ind == 14 | ind == 15 | ind ==20 | ind == 51 | ind == 52 | ind == 101 | ind == 102 | ind == 111 | ind == 103 | ind == 112 | ind == 121 | ind == 122 | ind == 744
replace ind_class =  "Manufacturing" if ind == 151 | ind == 152 | ind == 153 | ind == 154 | ind == 155 | ind == 160 | ind == 171 | ind == 172 | ind == 179 | ind == 173 | ind == 174 | ind == 181 | ind == 182 | ind == 191 | ind == 192 | ind == 193 | ind == 201 | ind == 202 | ind == 211 | ind == 212 | ind == 222 | ind == 223 | ind == 101 | ind == 231 | ind == 232 | ind == 233 | ind == 241 | ind == 243 | ind == 369 | ind == 244 | ind == 242 | ind == 251 | ind == 319 | ind == 252 | ind == 261 | ind == 262 | ind == 263 | ind == 269 | ind == 271 | ind == 272 | ind == 273 | ind == 281 | ind == 294 | ind == 289 | ind == 321 | ind == 300 | ind == 311 | ind == 322 | ind == 323 | ind == 331 | ind == 332 | ind == 333 | ind == 334 | ind == 312 | ind == 314 | ind == 313 | ind == 315 | ind == 295 | ind == 292 | ind == 293 | ind == 291 | ind == 361 | ind == 341 | ind == 342 | ind == 343 | ind == 351 | ind == 352 | ind == 353 | ind == 359 | ind == 369
replace ind_class = "Utilities " if (ind >= 401 & ind <= 403) | ind == 410 | ind == 901 | ind == 902 | ind == 371 | ind == 372 | ind == 517 | ind == 743 | ind == 744
replace ind_class = "Construction" if ind >= 451 & ind <= 465
replace ind_class = "Wholesale and retail trade" if ind == 501 | ind == 502 | ind == 503 | ind == 511 | ind == 512 | ind == 513 | ind == 514 | ind == 518 | ind == 515 | ind == 516 | ind == 517 | ind == 519 | ind == 521 | ind == 522 | ind == 525 | ind == 526 | ind == 524 | ind == 504 | ind == 523 | ind == 527 | ind == 528
replace ind_class = "Transportation and storage" if ind >= 601 & ind <= 639
replace ind_class = "Accommodation and food service" if ind >= 551 & ind <= 552
replace ind_class = "Information and communication" if ind == 221 | ind == 722 | ind == 871 | ind == 221 | ind == 873 | ind == 872 | ind == 641 | ind == 642 | ind == 721 | ind == 723 | ind == 729 | ind == 724 | ind == 729 | ind == 759 | ind == 881
replace ind_class = "Finance and insurance" if ind >= 659 & ind <= 672
replace ind_class = "Real estate" if ind >= 701 & ind <= 702
replace ind_class = "Professional, scientific, and technical" if ind == 731 | ind == 732 | ind == 741  | ind == 745  | ind == 742  | ind == 744  | ind == 852  | ind == 749  | ind == 746  | ind == 750
replace ind_class = "Business facilities management and support" if ind == 741 | ind == 759 | ind == 903 | ind == 14 | ind ==751 | ind == 633
replace ind_class = "Public administration and defense" if ind >= 761 & ind <= 765
replace ind_class = "Education" if ind == 801 | ind == 802 | ind == 803 | ind == 804 | ind == 809 | ind == 883 | ind == 889 | ind == 741 | ind == 742 | ind == 759
replace ind_class = "Human health and social work" if ind == 851 | ind == 861 | ind == 862
replace ind_class = "Arts, sports, and recreation" if ind == 873 | ind == 882 | ind == 889 | ind == 883
replace ind_class = "Membership organizations, repair, other personal services" if ind == 911 | ind == 912 | ind == 919 | ind == 921 | ind == 922 | ind == 923 | ind == 931 | ind == 749 | ind == 939
replace ind_class = "Activities of households as employers" if ind == 950
replace ind_class = "Activities of extraterritorial organizations and bodies" if ind == 990

///Marital Status: currently married
replace _5501_ = . if  _5501_ == -1
gen married = _5501_
replace married = 0 if married != 2 & !missing(married)
replace married = 1 if married == 2 & !missing(married)

//Marital Status: ever married up to and including this point (Includes married, separated, divorced, and widowed)
gen evermarried = 1 if _5501_ ==2 | _5501_ == 3 | _5501_ == 4 | _5501_ == 5 
replace evermarried = 0 if _5501_==1

///Children
rename _9074_ child_male
rename _9075_ child_female
gen children = child_male + child_female

//Children high school or younger (from HH dataset)
gen childreninhousehold = 1 if h1501==1
replace childreninhousehold = 0 if h1501==2
gen numchildren = h1502

// For each person (pid), identify household members who are NOT the individual in question
forvalues i = 1/15{
	replace age`i' = . if pid`i'==pid
}
// Generate variable with minimum age of other household members
local hhmembers
forvalues i = 1/15{
	local hhmembers `hhmembers' age`i'
}
egen hh_minage = rowmin(`hhmembers')
egen hh_maxage = rowmax(`hhmembers')

//Generate variable tags for household members of different ages
gen hh_child = 1 if (hh_minage<=18 & !missing(hh_minage))
gen hh_under5 = 1 if (hh_minage<=5 & !missing(hh_minage)) 
gen hh_elder = 1 if (hh_maxage>65 & !missing(hh_maxage)) 

//Generate variable tag for year any child is born
xtset pid years
gen hh_childborn = 1 if (hh_minage==0|(hh_minage==1&L1.hh_minage!=0))
gen hh_childborn_strict = 1 if (hh_minage==0)

//Generate variable tag for year first child is born
	//=1 if min age of household member this year is 0, and last year there was no under 18 in household
	gen hh_firstchildborn = 1 if (hh_childborn_strict==1 & (L1.hh_minage>=18|missing(L1.hh_minage)))
	//replaces with zero if there is a later year which fulfils this criterion
	bysort pid: egen temptest = total(hh_firstchildborn)
	gen temp_yeartag = years
	replace temp_yeartag = . if hh_firstchildborn!=1
	bysort pid: egen temp_firstyear = min(temp_yeartag), missing 
	gen temp_tagfirstyear = 1 if temp_firstyear==years //for each person, first year where hh_firstchildborn==1
	replace hh_firstchildborn = 0 if missing(temp_tagfirstyear) //replace other years with hh_firstchildborn==0
	drop temp*

//Generate variable tag for first year married
gen firstyearmarried = 1 if (_5501==2 & L1._5501==1)

//Generate variable for someone that ever has a child in their household in the sample
bysort pid: egen everchild = max(hh_child)

//Generate variable for someone that has said they have given birth
replace _9071_ = . if _9071_==-1
	//Note, per code book, that variable is different for first year in survey (1998)
	gen h_yearbirthindata = years if _9071_==2 & years>1998
	replace h_yearbirthindata = years if _9071_==1 & years==1998
	bysort pid: egen yearbirthindata = min(h_yearbirthindata), missing	
	gen havegivenbirth = 1 if years>=yearbirthindata 
	replace havegivenbirth = 0 if years<yearbirthindata & !missing(yearbirthindata)

//Generate dependent variables: working, earnings, and hours
gen working_main = 1 if main==1 //Main activity is working
	replace working_main = 0 if !missing(main) & missing(working_main)
gen working = 1 if main>=1 & main<=4 //Working, even if not main activity
	replace working = 0 if !missing(main) & missing(working)
gen earnings = 0 if working==0
replace earnings = wage*worktime if missing(earnings)
gen loghours = log(worktime)

//Gen regular work dummy, variables for full-time work and employer size
//Regular work by EAPS definition 
	qui gen foe11=0
	qui replace foe11=1 if _0501_==1
	qui replace foe11=1 if _0501_==2 & _0601_==1 & _0602_==2
	qui replace foe11=1 if _0501_==2 & _0601_==2 & (_0605_==1 | _0605_==2 | _0605_==3 | _0605_==4 |	_0605_==5 | _0605_==6)
	label var foe11 "temporary workers"
	
	qui gen foe12=0
	qui replace foe12=1 if _0315_==1
	label var foe12 "part-time workers"
	
	qui gen foe13=0
	qui replace foe13=1 if _0611_==2
	label var foe13 "temporary agency workers"

	qui gen foe14=0
	qui replace foe14=1 if _0611_==3
	label var foe14 "subcontracted workers"

	qui gen foe15=0
	qui replace foe15=1 if _0612_==1
	label var foe15 "workers in special forms of employment"

	qui gen foe16=0
	qui replace foe16=1 if _0613_==1
	label var foe16 "home-based workers"

	qui gen foe17=0
	qui replace foe17=1 if _0508_==1
	label var foe17 "daily (short-term) workers"

	qui gen regular1=1
	qui replace regular1=0 if (foe11+foe12+foe13+foe14+foe15+foe16+foe17>0) 
	replace regular1 = . if working!=1
	label var regular1 "regular workers (EAPS)"

//Regular work by work status
	qui gen regular=1
	qui replace regular=0 if status==2 | status==3
	replace regular = . if working!=1
	lab var regular "regular workers based on work status"

//Regular work by self-declared status
	qui gen regular2=1
	qui replace regular2=0 if _0317_==2 
	replace regular2 = . if working!=1
	lab var regular2 "self-declared regular"
	
	


//Full-time work
gen fulltime = 1 if _0315_==2
replace fulltime = 0 if _0315_==1
replace fulltime = . if working!=1

rename _0402_ employersize
gen logemployersize = log(employersize)
gen largeemployer = 1 if employersize>250 & !missing(employersize)
replace largeemployer = 0 if employersize<=250  & !missing(employersize)
replace largeemployer = . if working!=1

// Work satisfaction variables
gen satis_workplace = (_4321_<=2)
replace satis_workplace = . if missing(_4321_)
gen satis_work = (_4322_<=2)
replace satis_work = . if missing(_4322_)
gen satis_job = (_4301_>=4)
replace satis_job = . if missing(_4301_)
gen satis_earn = (_4311_<=2)
replace satis_earn = . if missing(_4311_)
gen satis_promotion = (_4318_<=2)
replace satis_promotion = . if missing(_4318_)

// Education/skill matching variables
gen wellmatched_educ = (_4401_>=3)
replace wellmatched_educ = . if missing(_4401_)
gen wellmatched_skills = (_4402_>=3)
replace wellmatched_skills = . if missing(_4402_)

// Generate years of experience variable for women observed in later years
bysort pid: egen yearsworking = total(working)

// rename weight variables according to the sample they apply to 
rename _weight_ _weight98_
rename _lweight_ _lweight98_
rename _sweight _weight09_
rename _lsweight_ _lweight09_
rename _nweight_ _weight18_
rename _lnweight_ _lweight18_

//Define cross-sectional weight for multi-year analysis: reweight to sum to 100 in each year so that no year weights more than another year in a multi-year regression
foreach weightvar in _weight98_ _weight09_ _weight18_{
bysort years: egen temp_tot`weightvar' = total(`weightvar')
gen `weightvar'_new = 100*`weightvar'/temp_tot`weightvar'
}
drop temp_*
gen _weight_ = _weight98__new if years<2009
replace _weight_ = _weight09__new if years>=2009 & years<2018
replace _weight_ = _weight18__new if years>=2018


// Save working dataset
save Data\klips_workingdata_hh, replace



//Erase intermediate datasets
erase Data\Raw\klips_indhid_clean.dta
foreach wave of local waves{
erase Data\Raw\eklips`wave'h_use3.dta
}
erase Data\klips_hh_merged.dta



